location='https://github.com/cqregan/uw_projects/raw/main/'
file='Use_Of_Force.csv'
link=paste0(location,file)
link
## [1] "https://github.com/cqregan/uw_projects/raw/main/Use_Of_Force.csv"
#getting the data TABLE from the file in the cloud:
df = read.csv(file=url(link))
df
names(df)
## [1] "ID" "Incident_Num" "Incident_Type"
## [4] "Occured_date_time" "Precinct" "Sector"
## [7] "Beat" "Officer_ID" "Subject_ID"
## [10] "Subject_Race" "Subject_Gender"
str(df,width = 70,strict.width='cut')
## 'data.frame': 15120 obs. of 11 variables:
## $ ID : chr "2015UOF-1456-1964-5409" "2017UOF-0168-1"..
## $ Incident_Num : int 6509 14471 5372 40328 18450 1168 44273 14..
## $ Incident_Type : chr "Level 1 - Use of Force" "Level 1 - Use "..
## $ Occured_date_time: chr "08/13/2015 01:57:00 AM" "02/13/2017 09:"..
## $ Precinct : chr "North" "North" "East" "South" ...
## $ Sector : chr "NORA" "JOHN" "EDWARD" "OCEAN" ...
## $ Beat : chr "N3" "J2" "E2" "O1" ...
## $ Officer_ID : int 1871 1238 1844 1576 1665 872 1776 1709 10..
## $ Subject_ID : int 5370 11804 4495 19125 14495 1127 20044 12..
## $ Subject_Race : chr "Black or African American" "Black or Af"..
## $ Subject_Gender : chr "Female" "Male" "Female" "Female" ...
head(df$Precinct,20)
## [1] "North" "North" "East" "South" "North" "Southwest"
## [7] "North" "South" "North" "X" "-" "East"
## [13] "East" "North" "East" "North" "West" "West"
## [19] "-" "South"
absoluteT=table(df$Precinct,
exclude = 'nothing')
absoluteT
##
## - 0 East North OOJ South Southwest West
## 583 11 2962 3419 58 2532 857 4327
## X
## 371
prop.table(absoluteT)
##
## - 0 East North OOJ South
## 0.0385582011 0.0007275132 0.1958994709 0.2261243386 0.0038359788 0.1674603175
## Southwest West X
## 0.0566798942 0.2861772487 0.0245370370
propT=prop.table(absoluteT)*100
propT
##
## - 0 East North OOJ South
## 3.85582011 0.07275132 19.58994709 22.61243386 0.38359788 16.74603175
## Southwest West X
## 5.66798942 28.61772487 2.45370370
names(absoluteT)[1]='Unknown'
names(absoluteT)[2]='Unknown'
names(absoluteT)[9]='Unknown'
dfCleaned = df
dfCleaned["Precinct"][dfCleaned["Precinct"] == "X"] <- "Unknown"
dfCleaned["Precinct"][dfCleaned["Precinct"] == "0"] <- "Unknown"
dfCleaned["Precinct"][dfCleaned["Precinct"] == "-"] <- "Unknown"
dfCleaned
absoluteT=table(dfCleaned$Precinct,
exclude = 'nothing')
dfCleaned
prop.table(absoluteT)
##
## East North OOJ South Southwest Unknown
## 0.195899471 0.226124339 0.003835979 0.167460317 0.056679894 0.063822751
## West
## 0.286177249
propT=prop.table(absoluteT)*100
propT
##
## East North OOJ South Southwest Unknown West
## 19.5899471 22.6124339 0.3835979 16.7460317 5.6679894 6.3822751 28.6177249
(tableFreq=as.data.frame(absoluteT))
names(tableFreq)=c("Precinct","Count")
tableFreq$Percent=as.vector(propT)
tableFreq
library(ggplot2)
base= ggplot(data = tableFreq,
aes(x = Precinct,
y = Count))
plot1 = base + geom_bar(fill ="gray",
stat = 'identity')
plot1

titleText='How frequently do police precincts in Seattle use force?'
sourceText='Source: City of Seattle'
plot2 = plot1 + labs(title=titleText,
x =NULL,
y = NULL,
caption = sourceText)
plot2

tableFreq=tableFreq[order(tableFreq$Percent),]
# then:
tableFreq
(PrecinctOrd=tableFreq[order(tableFreq$Percent),'Precinct'])
## [1] OOJ Southwest Unknown South East North West
## Levels: East North OOJ South Southwest Unknown West
plot2

base= base + scale_x_discrete(limits=PrecinctOrd)
base= base + theme_classic()
##
plot1 = base + geom_bar(fill ="gray",
stat = 'identity')
plot2 = plot1 + labs(title=titleText,
x =NULL,
y = NULL,
caption = sourceText)
plot2

str(df,width = 50,strict.width='cut')
## 'data.frame': 15120 obs. of 11 variables:
## $ ID : chr "2015UOF-1456-1964-5"..
## $ Incident_Num : int 6509 14471 5372 40328..
## $ Incident_Type : chr "Level 1 - Use of Fo"..
## $ Occured_date_time: chr "08/13/2015 01:57:00"..
## $ Precinct : chr "North" "North" "Eas"..
## $ Sector : chr "NORA" "JOHN" "EDWAR"..
## $ Beat : chr "N3" "J2" "E2" "O1" ...
## $ Officer_ID : int 1871 1238 1844 1576 1..
## $ Subject_ID : int 5370 11804 4495 19125..
## $ Subject_Race : chr "Black or African Am"..
## $ Subject_Gender : chr "Female" "Male" "Fem"..
dfCleaned[['Occured_date_time']] <- as.Date(dfCleaned[['Occured_date_time']], format = "%m/%d/%y")
dfCleaned
baseHY=ggplot(data=dfCleaned,
aes(x=Occured_date_time))
histHY=baseHY + geom_histogram(aes(fill=Precinct),
color='black')
histHY
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
